Source code for BigDFT.PostProcessing

"""A module for post processing BigDFT calculations.

"""


def _system_command(command, options):
    """
    Run the command as ``os.system (command + options)``

    Args:
       command (str): the actual command to run.
       options (str): the options to pass to the command.
    """
    from subprocess import call

    command_str = command + " " + options
    call(command_str, shell=True)


def _get_datadir(log):
    """
    Given a log file, this returns the path to the data directory.

    Args:
      log (Logfile): logfile from a BigDFT run.
    Returns:
      str: path to the data directory.
    """
    from os.path import join
    return join(log.srcdir, log.data_directory)


def _get_target(pv, cutoff, rand):
    from numpy.random import choice
    # Find the least pure fragment to modify
    target = min(pv, key=pv.get)
    target_purity = pv[target]

    if abs(target_purity) >= cutoff and rand:
        target_list = [pid for pid in pv if abs(pv[pid]) > cutoff]
        plist = [abs(x) for x in pv.values() if abs(x) > cutoff]
        normalize = sum(plist)
        plist = [x/normalize for x in plist]
        target = choice(target_list, p=plist)
    return target, target_purity


def _get_distance_dict(tree, frag_lookup, target, target_frag, distcut, k):
    # Build a list of nearby atoms for consideration
    ndist, nearest = tree.query(target_frag, distance_upper_bound=distcut, k=k)
    ndist = [x for sub in ndist for x in sub]
    nearest = [x for sub in nearest for x in sub]

    # Update that into a list of nearby fragments, and keep track of
    # the closest fragment
    distdict = {}
    for idx, dist in zip(nearest, ndist):
        try:
            fragidx = frag_lookup[idx]
        except IndexError:
            # kdtree returns invalid indices if it can't find enough
            # points.
            continue
        if fragidx == target:
            continue
        if fragidx not in distdict:
            distdict[fragidx] = dist
        elif distdict[fragidx] > dist:
            distdict[fragidx] = dist
    return distdict


[docs]class BigDFTool(object): """ This module defines a number of post-processing options, including those that can be driven by utilities like ``bigdft-tool``, ``memguess``, or ``utilities``. Args: omp (int): number of OpenMP threads. It defaults to the $OMP_NUM_THREADS variable in the environment, if present, otherwise it fixes the run to 1 thread. mpi_run (str): define the MPI command to be used. It defaults to the value $BIGDFT_MPIRUN of the environment, if present. """ def __init__(self, omp=1, mpi_run=None): from futile.YamlArgparse import get_python_function from futile import YamlIO from os import environ from os.path import join, dirname from functools import partial from copy import deepcopy # Executables self.bigdftroot = environ.get('BIGDFT_ROOT') if not omp: omp = environ.get('OMP_NUM_THREADS', '1'), if not mpi_run: mpi_run = environ.get("BIGDFT_MPIRUN", '') # Load the dictionary that defines all the operations. try: with open(join(dirname(__file__), "Database", "postprocess.yaml")) as ifile: db = YamlIO.load(stream=ifile)[0] except Exception as e: print(e) raise IOError("Couldn't find or load postprocess.yaml. " + "Check your python path." + f"The exception raised was {e}") # Create the subroutines of the class from the dictionary if self.bigdftroot is not None: for action, spec in db.items(): naction = action.replace("_", "-") nspec = deepcopy(spec) cmd = join(self.bigdftroot, nspec["tool"]) if nspec["mpi"]: cmd = mpi_run + " " + cmd else: nspec["args"]["mpirun"] = {"default": "" + mpi_run + ""} nspec["args"]["action"] = {"default": "" + naction + ""} my_action = get_python_function( partial(self._invoke_command, cmd), action, nspec) setattr(self, action, my_action) def _auto_fragment(self, msys, cutoff, verbose, rand, view, criteria): from scipy.spatial import KDTree # Setup one big kdtree and reverse lookup. poslist = [] posdict = {} frag_lookup = [] # Note the iteration is not over msys because the view may be a subset. for fragid in view.purities: posdict[fragid] = [] for at in msys[fragid]: pos = at.get_position() poslist.append(pos) posdict[fragid].append(pos) frag_lookup.append(fragid) tree = KDTree(poslist) fragment_mapping = {frag: [] for frag in view.purities} starting_length = len(fragment_mapping) # Cutoff for how far to look for a fragment to add. distcut = 10.0 k = 10 # Zero the internal bond order so we never merge a fragment with itself for fragid in view.bond_orders: view.bond_orders[fragid][fragid] = 0 for i in range(0, starting_length - 1): if verbose and starting_length > 20: step = int(starting_length / 20.0) if i % step == 0: print(str(5 * (i / step)) + "% done", len(fragment_mapping)) target, target_purity = _get_target(view.purities, cutoff, rand) if abs(target_purity) < cutoff: break distdict = _get_distance_dict(tree, frag_lookup, target, posdict[target], distcut, k) # In case nothing was found, we widen the search distance. if len(distdict.keys()) == 0: distcut *= 2 k *= 2 continue # Find the neighbor to add in if criteria == 'distance': addition = min(distdict, key=distdict.get) elif criteria == 'bondorder': bo_target = {x: view.bond_orders[target][x] for x in distdict} addition = max(bo_target, key=bo_target.get) else: raise ValueError("Criteria must be distance or bondorder") # New purity values. new_purity = view.bond_orders[target][addition] + \ view.bond_orders[addition][target] new_purity += view.purities[target]*view.charges[target]/2.0 new_purity += view.purities[addition]*view.charges[addition]/2.0 new_purity *= 2.0 / (view.charges[target] + view.charges[addition]) view.purities[target] = new_purity posdict[target] = posdict[target] + posdict[addition] # New bond order. for fragid in view.bond_orders: view.bond_orders[target][fragid] += \ view.bond_orders[addition][fragid] view.bond_orders[fragid][target] += \ view.bond_orders[fragid][addition] view.bond_orders[target][target] = 0 # New charge. view.charges[target] += view.charges[addition] # update lookups for fragid in list(view.bond_orders): del view.bond_orders[fragid][addition] del view.bond_orders[addition] del posdict[addition] del view.purities[addition] del view.charges[addition] fragment_mapping[target] += [addition] + fragment_mapping[addition] del fragment_mapping[addition] # Update the fragment lookup frag_lookup = [target if x == addition else x for x in frag_lookup] return fragment_mapping
[docs] def auto_fragment(self, system, view, cutoff, verbose=False, rand=False, criteria="bondorder"): """ Refragment a system based on the results of a previous calculation. The auto fragment protocol performs a greedy optimization using either the bond order or the distance between fragments as a guide. The purity values and the fragment bond orders are essential quantities for this calculation, so they can be passed if they are already cached. By using the `rand` keyword, you can trigger a stochastic refragmentation process. If this process is repeated many times, you may find a result that improves upon the greedy optimization approach. Args: system (BigDFT.Systems.System): the system to fragment. view (BigDFT.Systems.FragmentView): a view of the system. cutoff (float): the termination criteria. When the worst fragment is more pure than this cutoff, the calculation stops. verbose (bool): whether to print out as it proceeds. rand (bool): whether to activate the stochastic approach. criteria (string): either distance or bondorder. Returns: dict: a mapping from the old system to the new system where each fragment fullfills the purity criteria. """ from copy import deepcopy # This deep copy is manually overrided for a view so this is not # too slow. new_view = deepcopy(view) # Auto Fragment. fragment_mapping = self._auto_fragment(system, cutoff, verbose, rand, new_view, criteria) # Rename remapping = {} for fragid, add_list in fragment_mapping.items(): new_name = "+".join([fragid] + add_list) remapping[new_name] = [fragid] + add_list return remapping
[docs] def fragment_small_molecule(self, sys, view, cutoff=0.05, maxiter=100, verbose=False, seed=0): """ Stochastic algorithm for automatically fragmenting small molecules. Args: sys (BigDFT.Systems.System): the system to fragment. view (BigDFT.Systems.FragmentView): a view of the system. cutoff (float): the purity cutoff. maxiter (int): how many iterations to perform. verbose (bool): optionally it will print out the progress. seed (int): a seed value, as this is a stochastic process. Returns: (dict): a mapping from the original fragments to the new fragments. (BigDFT.Systems.FragmentView): the new fragment view. """ from numpy import random random.seed(seed) # Original nfrag = 0 mapping = {x: x for x in sys} if verbose: print("Iteration", "Number of Fragments") # Loop for i in range(maxiter): new_mapping = self.auto_fragment(sys, view, cutoff, rand=True) if nfrag < len(new_mapping): mapping = new_mapping nfrag = len(mapping) if verbose: print(i, nfrag) new_view = view.refragment(mapping) return mapping, new_view
[docs] def fragment_mask_matrix(self, sys, mat, fragments, log): """ Sometimes we don't want to store an entire matrix, just the parts related to some fragments of interest. This routine will mask out a matrix, keeping entries only related to the list of fragments provided. Args: sys (BigDFT.Systems.System): the system associated with the matrix. mat (scipy.sparse.csr_matrix): the matrix to mask. fragments (list): a list of fragment ids to keep. log (BigDFT.Logfiles.Logfile): the logfile associated with this matrix's calculation. Returns: (scipy.sparse.csr_matrix): the masked matrix. """ from BigDFT.Systems import System from copy import deepcopy from scipy.sparse import dok_matrix subsys = System() for fragid in fragments: subsys[fragid] = deepcopy(sys[fragid]) frag_indices = self.get_frag_indices(subsys, log) frag_indices = sum(frag_indices.values(), []) imat = dok_matrix(mat.shape) for i in frag_indices: imat[i, i] = 1 return imat.dot(mat).dot(imat)
[docs] def compute_fragment_dos(self, frag, log, ks_coeff, eigvals, frag_indices=None, smat=None, assume_pure=False, **kwargs): """ Compute the partial density of states projected onto a given fragment. Args: sys (BigDFT.Fragments.Fragment): the fragment to project on to. log (BigDFT.Logfiles.Logfile): the log of the calculation. ks_coeff (scipy.sparse.csc_matrix): the matrix of eigenvectors. eigvals (list): a list of eigenvalues. frag_indices (list): list of indices associated with this fragment. smat (scipy.sparse.csc_matrix): the overlap matrix. assume_pure (bool): an optimization can be performed if we assume the target is pure. kwargs (dict): any extra options to pass to the DoS constructor. Returns: (BigDFT.DoS.DoS): a density of states object built using the partial density of states. """ from BigDFT.Systems import System from BigDFT.DoS import DoS # Subsystem with just this fragment. subsys = System() subsys["FRAG:0"] = frag # Optional Argments if smat is None: smat = self.get_matrix_s(log) if frag_indices is None: frag_indices = self.get_frag_indices(subsys, log)["FRAG:0"] subsmat = smat[frag_indices, :] subkmat = ks_coeff[frag_indices, :].T vals = [] for n in range(len(eigvals)): work = subkmat[n, :].dot(subsmat) if assume_pure: sumval = work[:, frag_indices].dot(ks_coeff[frag_indices, n]) else: sumval = work[:, :].dot(ks_coeff[:, n]) vals.append(sumval[0, 0]) # Construct the dos object dos = DoS(energies=eigvals, norm=[vals], units='AU', **kwargs) return dos
[docs] def create_layered_qmmm_system(self, system, target, pairwise_bo, cutoffs, criteria="bondorder", link_atoms=False): """ Creates a multilayered system suitable for QM/MM calculations. For each layer, a suitable QM region is built around it. Args: system (BigDFT.Systems.System): a System class, already broken up into fragments. pairwise_bo (dict): pairwise bond order values between all fragments in the system. target (str): the name of the fragment to treat as the target of the qm/mm run. cutoffs (list): a list of cutoff value for fragment interactions. The number of layers is equal to the number of cutoffs. criteria (str): how to determine which atoms are included in the QM region. Valid choices are "bondorder" and "distance". link_atoms (bool): whether to generate link atoms. Returns: (list): a list of Systems, one for each QM layer. (System): the MM region. """ from copy import deepcopy from BigDFT.Systems import System # First, we will build the layers using only keys. qm_list = [] qm, mm = self.create_qmmm_system(system, target, pairwise_bo[target], cutoffs[0], criteria, link_atoms=False) qm_list.append(list(qm.keys())) mm_keys = list(mm.keys()) # To avoid duplicates used_list = deepcopy(list(qm.keys())) for k in range(1, len(cutoffs)): qm_list.append([]) for subtarget in qm_list[k-1]: qm, mm = self.create_qmmm_system(system, subtarget, pairwise_bo[subtarget], cutoffs[k], criteria, link_atoms=False) qm_list[k] += qm.keys() # Remove duplicates qm_list[k] = [x for x in qm_list[k] if x not in used_list] qm_list[k] = list(set(qm_list[k])) used_list = list(set(used_list + qm_list[k])) mm_keys = [x for x in mm_keys if x not in qm_list[k]] # Next, we will convert those keys into systems. qmsys = [] for j in range(0, len(cutoffs)): qmsys.append(System()) for k in qm_list[j]: qmsys[j][k] = deepcopy(system[k]) mmsys = System() for k in mm_keys: mmsys[k] = deepcopy(system[k]) # Add the link atoms. if link_atoms: # One big system to add the link atoms. mergesys = System() for k in range(0, len(cutoffs)): for fragid, frag in qmsys[k].items(): mergesys[fragid] = frag linksub, remove = self.generate_link_atoms(system, mergesys) # Insert the link atoms into the correct layers. for fragid, frag in linksub.items(): for at in frag: if at.is_link: for k in range(0, len(cutoffs)): if fragid in qmsys[k]: qmsys[k][fragid] += [at] break return qmsys, mmsys
[docs] def create_qmmm_system(self, system, target, bond_order, cutoff, criteria="bondorder", link_atoms=False): """ Creates a system suitable for QM/MM calculations. Args: system (System): a System class, already broken up into fragments. target (str): the name of the fragment to treat as the target of the qm/mm run. bond_order (dict): bond order values between the target fragment and all other fragments in the system. cutoff (float): a cutoff value for fragment interactions. criteria (str): how to determine which atoms are included in the QM region. Valid choices are "bondorder" and "distance". link_atoms (bool): whether to generate link atoms. Returns: (System): the QM region. (System): the MM region. """ from BigDFT.Systems import System from BigDFT.Fragments import pairwise_distance from copy import deepcopy qmsys = System() mmsys = deepcopy(system) if criteria == "bondorder": # Order the fragments by their spillage value sort_spill = sorted(bond_order.keys(), key=lambda x: bond_order[x], reverse=True) # Iterate until we reach the cutoff cumsum = sum(bond_order.values()) for key in sort_spill: qmsys[key] = deepcopy(system[key]) del mmsys[key] # check cutoff cumsum -= bond_order[key] if cumsum < cutoff: break elif criteria == "distance": interaction = {x: pairwise_distance(system[target], system[x]) for x in system.keys()} for fragid, frag in system.items(): if interaction[fragid] < cutoff: qmsys[fragid] = deepcopy(frag) del mmsys[fragid] else: raise ValueError("Criteria must be either distance or bondorder") # Double check that the target is in qmsys. # (this can happen if the target is not a pure fragment) if target not in qmsys: qmsys[target] = system[target] if link_atoms: qmsys, remove = self.generate_link_atoms(system, qmsys) return qmsys, mmsys
[docs] def fragment_bond_order(self, sys, fraglist1, fraglist2, log, kxs=None, frag_indices=None): """ Computes "bond order" between two sets of fragments using the method of Mayer. For two atomic fragments, this would describe the bond multiplicity of the covalent bond. Args: sys (BigDFT.Systems.System): the system containing the fragments of interest fraglist1 (list): a list of fragments to compute the bond order of. fraglist2 (list): a list of fragments to compute the bond order between. log (BigDFT.Logfiles.Logfile): the log describing a calculation. kxs (scipy.sparse.csc_matrix): the matrix K*S, which might be already computed to reduce I/O time. frag_indices (dict): the matrix indices associated with each fragment. Return: (dict): a dictionary of dictionaries, mapping the bond order of each fragment in fraglist1 to each fragment in fraglist2. """ from scipy.sparse import csr_matrix, csc_matrix # Get the metadata if kxs is None: kxs = self.get_matrix_kxs(log) if frag_indices is None: frag_indices = self.get_frag_indices(sys, log) slist1 = sorted(fraglist1, key=lambda x: min(frag_indices[x])) slist2 = sorted(fraglist2, key=lambda x: min(frag_indices[x])) bond_orders = units_quadratic_traces(slist1, slist2, frag_indices, csc_matrix(kxs), csr_matrix(kxs), 0.5, 0.5) return bond_orders
[docs] def fragment_interaction_energy(self, sys, fraglist1, fraglist2, log, frag_indices=None, sinvh=None, kxs=None): """ Compute the interaction energies between two sets of fragments. Args: fraglist1 (list): a list of fragments to compute the interaction energy of. fraglist2 (list): a list of fragments to compute the interaction energy between. log (BigDFT.Logfiles.Logfile): the log describing a calculation. frag_indices (dict): the matrix indices associated with each fragment. sinvh (scipy.sparse.csc_matrix): the matrix S^{-1}*H, which might be already computed to reduce I/O time. kxs (scipy.sparse.csc_matrix): the matrix K*S, which might be already computed to reduce I/O time. Returns: (float): the projected energy. """ from scipy.sparse import csr_matrix, csc_matrix # Get the metadata if kxs is None: kxs = self.get_matrix_kxs(log) if sinvh is None: sinvh = self.get_matrix_sinvh(log) if frag_indices is None: frag_indices = self.get_frag_indices(sys, log) interaction_energy = units_quadratic_traces(fraglist1, fraglist2, frag_indices, csc_matrix(sinvh), csr_matrix(kxs)) return interaction_energy
[docs] def fragment_population(self, sys, log, frag_indices=None, kxs=None): """ Performs Mulliken population analysis on a fragment, in case charges haven't been computed by doing a multipole analysis. Args: sys (BigDFT.Systems.System): the system to compute the population of. log (BigDFT.Logfiles.Logfile): the log describing a calculation. frag_indices (dict): the matrix indices associated with each fragment. kxs (scipy.sparse.csc_matrix): the matrix K*S, which might be already computed to reduce I/O time. Return: (dict): a mapping from fragment ids to charges. """ from numpy import trace if kxs is None: kxs = self.get_matrix_kxs(log) if frag_indices is None: frag_indices = self.get_frag_indices(sys, log) charges = {} for fragid in sys: smat = kxs[:, frag_indices[fragid]] smat = smat[frag_indices[fragid], :] charges[fragid] = sum(x.nel for x in sys[fragid]) - \ trace(smat.todense()) charges[fragid] = charges[fragid].item() # remove numpy return charges
[docs] def run_compute_purity(self, system, log, kxs=None, frag_list=None, frag_indices=None): """ Compute the purity values of the different system fragments. Note that this can also be computed using the fragment multipoles, but this provides an implementation for when you don't need those values. Args: system (System): instance of a System class, which defines the fragmentation. log (Logfile): logfile from the run computed on this system. kxs (scipy.sparse.csc_matrix): the matrix K*S, which might be already computed to reduce I/O time. frag_list (list): we can also only compute the purity values of some select fragments. frag_indices (list): the indices of the matrix associated with each fragment. This can be precomputed and passed. Returns: (dict): for each fragment id, what is the purity value. """ from scipy.sparse import csc_matrix # Handle the optional parameters if frag_list is None: working_sys = system else: working_sys = {k: system[k] for k in frag_list} if kxs is None: kxs = self.get_matrix_kxs(log) # Get the metadata if frag_indices is None: fidx = self.get_frag_indices(working_sys, log) else: fidx = {x: frag_indices[x] for x in working_sys} # Compute the purity array charges = {} for fragid, frag in working_sys.items(): charges[fragid] = sum(x.nel for x in frag) purity_array = _compute_purity_values(csc_matrix(kxs), charges, fidx) return purity_array
[docs] def get_frag_indices(self, sys, log): """ Compute a lookup table of matrix indices for each fragment in a system. Args: system (System): instance of a System class, which defines the fragmentation. log (Logfile): logfile from the run computed on this system. Returns: (dict): a mapping of fragment ids to lists of indices """ from BigDFT.Spillage import MatrixMetadata from os.path import join data_dir = _get_datadir(log) metadatafile = join(data_dir, "sparsematrix_metadata.dat") metadata = MatrixMetadata(metadatafile) frag_indices = metadata.get_frag_indices(sys) return frag_indices
[docs] def get_ks_coeff(self, log): """ Retrieve the Kohn-Sham coefficients and the eigenvalues in matrix form. log (Logfile): instance of a Logfile class Returns: (scipy.sparse.csc_matrix): the matrix of coefficients (list): list of eigenvalues. """ from os.path import join from scipy.sparse import coo_matrix, csc_matrix # Define the input files. data_dir = _get_datadir(log) infile = join(data_dir, "KS_coeffs.bin") evals = [] rows = [] cols = [] vals = [] with open(infile) as ifile: nspin, ntmb, nfvctr = [int(x) for x in next(ifile).split()[:3]] # Values for i in range(0, nfvctr): evals.append(float(next(ifile).split()[0])) # Vectors for line in ifile: coeff, j, i = line.split()[:3] rows.append(int(j)-1) cols.append(int(i)-1) vals.append(float(coeff)) mat = coo_matrix((vals, (rows, cols)), shape=(ntmb, nfvctr)) return csc_matrix(mat), evals
[docs] def get_matrix_kxs(self, log): """ Computes the matrix K*S, the mulliken version of the density matrix, and loads it into memory. log (Logfile): instance of a Logfile class Returns: (scipy.sparse.csc_matrix): the matrix K*S """ from BigDFT.Spillage import serial_compute_puritybase from scipy.sparse import csc_matrix from scipy.io import mmread, mmwrite from os.path import join def generate_kxs(kxsfile, k_and_s): kfile, sfile = k_and_s # Then compute with python kxs = serial_compute_puritybase(sfile.obj, kfile.obj) mmwrite(kxsfile, kxs) kxsfile = join(_get_datadir(log), 'kxs_sparse.mtx') try: kxs = self.derived_quantity( kxsfile, log, ['density_kernel', 'overlap'], generate_kxs) except IOError: kxs = self.derived_quantity( kxsfile, log, ['density_kernel_sparse', 'overlap_sparse'], generate_kxs) # validate the node and read the matrix kxs.validate() return csc_matrix(mmread(kxsfile))
[docs] def derived_quantity(self, name, log, files, generator): """ Defines a quantity that can be derived from the ccs files in the files list. Requires the generator function. """ from futile.Utils import Node, more_recent_than_parent from os.path import join, basename def more_recent_than_logfile(filename): logfile = join(log.srcdir, basename(log.label)) return more_recent_than_parent(filename, logfile) def validate_all(h_and_s): ok = True for m in h_and_s: ok = ok and m.validate() if not ok: break return ok def more_recent_than_parents(spillfile, node): ok = True for m in node: ok = ok and more_recent_than_parent(spillfile, m.obj) if not ok: break return ok newfile = join(_get_datadir(log), name) # in case there is no need to rerun the generation newnode = Node(newfile, valid_if=more_recent_than_logfile) if newnode.valid: return newnode nodefiles = [self.ccs_node(log, f) for f in files] node = Node(nodefiles, valid_if=validate_all) newnode = Node(newfile) newnode.requires(node, generator=generator) newnode.valid_if(more_recent_than_parents) try: node.validate() except AttributeError: raise IOError() return newnode
[docs] def ccs_node(self, log, matrixname): """ Defines the protocol to validate the ccs matrix file """ from os.path import join, isfile from futile.Utils import Node, non_null_size data_dir = _get_datadir(log) mtxfile = join(data_dir, matrixname+'.mtx') if isfile(mtxfile): # matrix market output return Node(mtxfile, valid_if=non_null_size) if log.log["lin_general"]["output_mat"] == 4: mpifile = join(data_dir, matrixname+".mpi") else: mpifile = None txtfile = join(data_dir, matrixname+".txt") ccsfile = join(data_dir, matrixname+".ccs") return file_dependency_queue(self, ccsfile, txtfile, mpifile=mpifile)
[docs] def get_matrix_sinvh(self, log): """ Computes the matrix S^{-1}*H, the mulliken version of the spillage matrix, and loads it into memory. log (Logfile): instance of a Logfile class Returns: (scipy.sparse.csc_matrix): the matrix S^{-1}*H """ from BigDFT.Spillage import serial_compute_spillagebase from scipy.sparse import csc_matrix from scipy.io import mmread, mmwrite from os.path import join def generate_sinvh(spillfile, h_and_s): houtfile, soutfile = h_and_s # Then compute with python sinvh = serial_compute_spillagebase(soutfile.obj, houtfile.obj) mmwrite(spillfile, sinvh) spillfile = join(_get_datadir(log), 'sinvh_sparse.mtx') try: sinvh = self.derived_quantity(spillfile, log, ['hamiltonian', 'overlap'], generate_sinvh) except IOError: sinvh = self.derived_quantity(spillfile, log, ['hamiltonian_sparse', 'overlap_sparse'], generate_sinvh) # validate the node and read the matrix sinvh.validate() return csc_matrix(mmread(spillfile))
def _get_matrix(self, name, log): from os.path import join, isfile from BigDFT.Spillage import read_ccs from scipy.io import mmread from scipy.sparse import csc_matrix # Define the input files. data_dir = _get_datadir(log) # Check for mtx file mtxfile = join(data_dir, name+".mtx") if isfile(mtxfile): return csc_matrix(mmread(mtxfile)) soutfile = join(data_dir, name+".ccs") # Convert if necessary if not isfile(soutfile): sfile = join(data_dir, name+".txt") # Convert to text format if necessary if log.log["lin_general"]["output_mat"] == 4: infile = join(data_dir, sfile.replace(".txt", ".mpi")) if not isfile(sfile): self.convert_matrix_format(conversion="binary_to_bigdft", infile=infile, outfile=sfile) # Convert to CCS soutfile = join(data_dir, name+".ccs") self.convert_matrix_format(conversion="bigdft_to_ccs", infile=sfile, outfile=soutfile) return read_ccs(soutfile)
[docs] def get_matrix_s(self, log): """ Read the overlap matrix into memory. log (Logfile): instance of a Logfile class Returns: (scipy.sparse.csc_matrix): the matrix S """ return self._get_matrix("overlap_sparse", log)
[docs] def get_matrix_h(self, log): """ Read the hamiltonian matrix into memory. log (Logfile): instance of a Logfile class Returns: (scipy.sparse.csc_matrix): the matrix H """ return self._get_matrix("hamiltonian_sparse", log)
[docs] def get_matrix_k(self, log): """ Read the density matrix into memory. log (Logfile): instance of a Logfile class Returns: (scipy.sparse.csc_matrix): the matrix K """ return self._get_matrix("density_kernel_sparse", log)
def _invoke_command(self, command, **kwargs): from futile.Utils import option_line_generator _system_command(command, option_line_generator(**kwargs))
def file_dependency_queue(btool, ccsfile, txtfile, mpifile=None): from futile.Utils import Node, non_null_size, more_recent_than_parent def binary_to_bigdft(node, parent): btool.convert_matrix_format(conversion="binary_to_bigdft", infile=parent, outfile=node) def bigdft_to_ccs(node, parent): btool.convert_matrix_format(conversion="bigdft_to_ccs", infile=parent, outfile=node) if mpifile is not None: mpi = Node(mpifile, valid_if=non_null_size) txt = Node(txtfile) txt.requires(mpi, generator=binary_to_bigdft) txt.valid_if(more_recent_than_parent) else: txt = Node(txtfile, valid_if=non_null_size) ccs = Node(ccsfile) ccs.requires(txt, generator=bigdft_to_ccs) ccs.valid_if(more_recent_than_parent) return ccs def _compute_purity_values(kxs, charges, frag_indices): """ Computes the actual purity values. Args: kxs (scipy.sparse.csc): K*S charges (dict): the charge to divide by for each framgent. frag_indices (dict): indices associated with each fragment. Returns: (dict): purity of each fragment. """ from numpy import trace purity_values = {} for idx in frag_indices: indices = frag_indices[idx] smat = kxs[:, indices] smat = 0.5 * smat[indices, :] purity_values[idx] = 2.0 * trace( (smat.dot(smat) - smat).todense()) / charges[idx] purity_values[idx] = purity_values[idx].item() # remove numpy return purity_values
[docs]def superunits_purities(bo, pv, q_units, superunits, q_superunits): """ Compute the purity values of the superunits described as a unification of the units Args: bo (matrix-like): Fragment Bond Orders of the Units pv (array-like): Purity values of the Units superunits (dict): lookup dictionary containing the list of units per superunit q_units (array-like): charges of the units q_superunits (array-like): charges of the superunits Returns: dict: purities of the superunits """ purities = {} for fragid, atlist in superunits.items(): purity = 0 # Sum the bond orders for a1 in atlist: for a2 in atlist: if a1 != a2: purity += bo[a1][a2] # Sum the un-normalized atomic purity values for a in atlist: charge = q_units[a] # sum([x.nel for x in atsys["ATOM:"+str(a)]]) purity += pv[a] * charge / 2.0 # Normalize purity *= 2.0 / q_superunits[fragid] # (sum([x.nel for x in resys[fragid]])) purities[fragid] = purity return purities
def units_quadratic_traces(unitlist1, unitlist2, frag_indices, mat1, mat2, a1=1.0, a2=1.0): from numpy import einsum # Double loop over the pairs to compute. interaction = {} for fid1 in unitlist1: interaction[fid1] = {} idx1 = frag_indices[fid1] lmat1 = mat1[:, idx1].todense() # smat1 rmat1 = mat2[idx1, :].todense() # smat2.T for fid2 in unitlist2: idx2 = frag_indices[fid2] # Optimization for the case where the indices are consecutive # values. sidx2 = sorted(idx2) mindx = min(sidx2) maxdx = max(sidx2) if len(sidx2) == maxdx - mindx + 1: lmat2 = lmat1[mindx:maxdx+1, :] # smat12 rmat2 = rmat1[:, mindx:maxdx+1] # smat21.T else: lmat2 = lmat1[idx2, :] # smat12 rmat2 = rmat1[:, idx2] # smat21.T interaction[fid1][fid2] = a1*a2*einsum("ij,ji->", lmat2, rmat2) # remove numpy interaction[fid1][fid2] = interaction[fid1][fid2].item() return interaction
[docs]def superunits_quadratic_quantities(bo, superunits): """ Compute quantities that transforms like the bond orders of the superunits described as a unification of the units Args: bo (matrix-like): Quantities like Fragment Bond Orders of the Units superunits (dict): lookup dictionary containing the list of units per superunit Returns: dict: quantities of the superunits """ bo_new = {} for f1, atlist1 in superunits.items(): for f2, atlist2 in superunits.items(): order = 0 for a1 in atlist1: for a2 in atlist2: order += bo[a1][a2] bo_new.setdefault(f1, {})[f2] = order return bo_new
[docs]def systems_heatmap(data, restrict_to=None, axs=None, columns=None, **kwargs): """ Create a heatmap for a set of systems. Args: data (dict): a dictionary mapping system names to another dictionary which maps fragment ids to property values. restrict_to (list): a list of lists saying what fragments we should draw. It is a list of lists instead of just a list because this way we can put a separator between values (for example, when making a jump in the sequence). axs (matplotlib.axis): the axis to draw on. columns (list): the order of the columns on the x axis kwargs (dict): any extra argument you wish to pass to matplotlib's imshow command. Returns: a reference to the imshow value. """ from numpy import zeros import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable # Handle optional axis if axs is None: fig, axs = plt.subplots(1, 1, figsize=(12, 8)) # Handle the restrict_to option. if restrict_to is not None: keylist = [x for x in [y for y in restrict_to]] keylist = [x for y in restrict_to for x in y] else: key = list(data.keys())[0] keylist = list(data[key]) cols = columns if columns is not None else data.keys() # Create the matrix of values and plot it. mat = zeros((len(keylist), len(cols))) for i, keyi in enumerate(cols): for j, keyj in enumerate(keylist): mat[j, i] = data[keyi][keyj] im = axs.imshow(mat, **kwargs) # Labels if restrict_to is not None: tickloc = [] ticklab = [] i = 0 for x in restrict_to: for y in x: tickloc.append(i) ticklab.append(y) i += 1 if x == restrict_to[-1]: break tickloc.append(i-0.49) ticklab.append("-------") axs.set_yticks(tickloc) axs.set_yticklabels(ticklab, family='monospace') else: axs.set_yticks(range(mat.shape[0])) axs.set_yticklabels(keylist, family='monospace') axs.set_xticks(range(mat.shape[1])) axs.set_xticklabels(list(cols)) for tick in axs.get_xticklabels(): tick.set_rotation(75) # Draw the colorbar divider = make_axes_locatable(axs) cax = divider.append_axes("right", size="2%", pad=0.15) plt.colorbar(im, cax=cax) # Draw lines that separate out the boxes. for i in range(mat.shape[0]-1): axs.axhline(i+0.49, color='k', linestyle='-', linewidth=0.5) for i in range(mat.shape[1]-1): axs.axvline(i+0.49, color='k', linestyle='-', linewidth=0.5) # Not having a fixed aspect is good for larger plots. axs.set_aspect("auto") return im
[docs]def dict_distplot(system_dict, ax=None, reuse_ticks=False, kind='violin', **kwargs): """Represent a violinplot from a dictionary of values. Args: system_dict (dict): dictionary mapping the labels to plot with their values. ax (matplotlib.pyplot.axis): the axis to plot on. reuse_ticks (bool): Employ the existing xticks of the axes to plot the data kind (str): 'violin' for violinplot, 'box' for boxplot. **kwargs: any other argument to be passed to the violin/boxplot Returns: tuple: the axis of the plot and the object returned from the violinplot. """ from matplotlib import pyplot as plt from numpy import nan def get_keys_and_values(ax, dd, from_scratch): keys = [lb.get_text() for lb in ax.get_xticklabels()] if sum(len(k) for k in keys) == 0 or from_scratch: keys = dd.keys() else: keys += [k for k in sorted(dd.keys()) if k not in keys] values = [dd.get(k, len(dd[list(dd.keys())[0]])*[nan]) for k in keys] return keys, values from_scratch = False if ax is None: fig, ax = plt.subplots() from_scratch = True else: from_scratch = not reuse_ticks or (not ax.lines and not ax.collections) keys, values = get_keys_and_values(ax, system_dict, from_scratch) if kind == 'violin': ret = ax.violinplot(values, **kwargs) elif kind == 'box': ret = ax.boxplot(values, **kwargs) ax.set_xticks(range(1, len(keys)+1)) ax.set_xticklabels(keys, rotation=90) return ax, ret
[docs]def _example(): """ Postprocessing Example """ from BigDFT.Systems import System, FragmentView from BigDFT.Fragments import Fragment from BigDFT.IO import XYZReader from BigDFT.Calculators import SystemCalculator from BigDFT.Inputfiles import Inputfile from scipy.linalg import eigh from copy import deepcopy # Create a system sys = System() sys["FRA:0"] = Fragment() with XYZReader("CH2") as ifile: for at in ifile: sys["FRA:0"].append(at) sys["FRA:1"] = Fragment() with XYZReader("CH3") as ifile: for at in ifile: sys["FRA:1"].append(at) sys["FRA:1"].translate([0, 0, -3]) sys["FRA:2"] = deepcopy(sys["FRA:0"]) sys["FRA:2"].translate([0, 0, 3]) sys["FRA:2"].rotate(y=150, units="degrees") sys["FRA:3"] = deepcopy(sys["FRA:0"]) sys["FRA:3"].translate([3, 0, 1.5]) print(list(sys)) # Run a calculation in the linear mode. inp = Inputfile() inp.set_xc("PBE") inp.set_hgrid(0.55) inp.set_psp_nlcc() inp["import"] = "linear" code = SystemCalculator() code.update_global_options(verbose=False) log = code.run(input=inp, posinp=sys.get_posinp(), run_dir="work") sys.set_atom_multipoles(log) # Create the post processing tool. from BigDFT.PostProcessing import BigDFTool btool = BigDFTool() # Purity purity = btool.run_compute_purity(sys, log) print(purity) # Charges charges = {fragid: sum(at.nel for at in frag) for fragid, frag in sys.items()} # Bond Orders bo = btool.fragment_bond_order(sys, sys.keys(), sys.keys(), log) # Population values. population = btool.fragment_population(sys, log) print(population) # These three things define a fragment view. view = FragmentView(purity, bo, charges) # Auto Fragmentation mapping = btool.auto_fragment(sys, view, 0.10) print(mapping) # This defines a new view. new_view = view.refragment(mapping) print(new_view.purities) # Eigenvalues. H = btool.get_matrix_h(log) S = btool.get_matrix_s(log) w = eigh(H.todense(), b=S.todense(), eigvals_only=True) print(w)
if __name__ == "__main__": _example()